lab04

Author

Vanessa Ramos

Question 1- Read in the Data

if (!file.exists("met_all.gz"))
  download.file(
    url = "https://raw.githubusercontent.com/USCbiostats/data-science-data/master/02_met/met_all.gz",
    destfile = "met_all.gz",
    method   = "libcurl",
    timeout  = 60
    )
met <- data.table::fread("met_all.gz")

Question 2 - Prepare the data

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.2     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.3     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
met <- met[met$temp > -10][elev == 9999.0, elev := NA]
met = mutate(met, date = as.Date(paste(year, month, day, sep = "-")))
met$week = week(met$date)
met %>%
    filter(day==1:7)
Warning: There was 1 warning in `filter()`.
ℹ In argument: `day == 1:7`.
Caused by warning in `day == 1:7`:
! longer object length is not a multiple of shorter object length
       USAFID  WBAN year month day hour min   lat      lon elev wind.dir
    1: 690150 93121 2019     8   1    0  56 34.30 -116.166  696      220
    2: 690150 93121 2019     8   1    7  56 34.30 -116.166  696       10
    3: 690150 93121 2019     8   1   14  56 34.30 -116.166  696       NA
    4: 690150 93121 2019     8   1   21  56 34.30 -116.166  696      200
    5: 690150 93121 2019     8   2    5  56 34.30 -116.166  696      260
   ---                                                                  
74528: 726813 94195 2019     8   6   14  56 43.65 -116.633  741       20
74529: 726813 94195 2019     8   6   21  56 43.65 -116.633  741      320
74530: 726813 94195 2019     8   7    5  56 43.65 -116.633  741       NA
74531: 726813 94195 2019     8   7   12  56 43.65 -116.633  741       40
74532: 726813 94195 2019     8   7   19  56 43.65 -116.633  741       NA
       wind.dir.qc wind.type.code wind.sp wind.sp.qc ceiling.ht ceiling.ht.qc
    1:           5              N     5.7          5      22000             5
    2:           5              N     2.1          5      22000             5
    3:           9              C     0.0          5      22000             5
    4:           5              N     7.2          5      22000             5
    5:           5              N     4.6          5      22000             5
   ---                                                                       
74528:           5              N     3.1          5      22000             5
74529:           5              N     3.1          5      22000             5
74530:           9              C     0.0          5      22000             5
74531:           5              N     1.5          5      22000             5
74532:           9              C     0.0          5      22000             5
       ceiling.ht.method sky.cond vis.dist vis.dist.qc vis.var vis.var.qc temp
    1:                 9        N    16093           5       N          5 37.2
    2:                 9        N    16093           5       N          5 28.9
    3:                 9        N    16093           5       N          5 27.8
    4:                 9        N    16093           5       N          5 40.0
    5:                 9        N    16093           5       N          5 31.7
   ---                                                                        
74528:                 9        N    16093           5       N          5 21.1
74529:                 9        N    16093           5       N          5 36.1
74530:                 9        N    16093           5       N          5 23.9
74531:                 9        N    16093           5       N          5 16.7
74532:                 9        N    16093           5       N          5 32.2
       temp.qc dew.point dew.point.qc atm.press atm.press.qc       rh
    1:       5      10.6            5    1009.9            5 19.88127
    2:       5       6.7            5    1012.8            5 24.58578
    3:       5       5.6            5    1015.0            5 24.31468
    4:       5       5.0            5    1011.5            5 11.54872
    5:       5       3.3            5    1012.0            5 16.42942
   ---                                                               
74528:       5      16.1            5    1011.5            5 73.24593
74529:       5       9.4            5    1010.0            5 19.50586
74530:       5      12.2            5    1009.1            5 48.01237
74531:       5      13.3            5    1010.5            5 80.48975
74532:       5      11.7            5    1011.4            5 28.44852
             date week
    1: 2019-08-01   31
    2: 2019-08-01   31
    3: 2019-08-01   31
    4: 2019-08-01   31
    5: 2019-08-02   31
   ---                
74528: 2019-08-06   32
74529: 2019-08-06   32
74530: 2019-08-07   32
74531: 2019-08-07   32
74532: 2019-08-07   32
rhnum <-as.numeric(met$rh)
tempnum <-as.numeric(met$temp)
windnum <-as.numeric(met$wind.sp)
vistnum <-as.numeric(met$vist.dist)
dewnum <-as.numeric(met$dew.point)
latnum<- as.numeric(met$lat)
lonnum<-as.numeric(met$lon)
elevnum<-as.numeric(met$elev)
mean(rhnum)
[1] NA
mean(tempnum)
[1] 23.59178
mean(windnum)
[1] NA
mean(vistnum)
[1] NaN
mean(dewnum)
[1] NA
mean(latnum)
[1] 37.96869
mean(lonnum)
[1] -92.14057
mean(elevnum)
[1] NA
met$region <- ifelse(met$lon > -98 & met$lat>39.71, "NE", 
                              ifelse(met$lon > -98 & met$lat <39.71, "NW",
                                    ifelse(lon<-98 & met$lat>39.71, "SE", "SW" )))
met_avg <- met[,.(
  temp     = mean(temp,na.rm=TRUE),
  rh       = mean(rh,na.rm=TRUE),
  wind.sp  = mean(wind.sp,na.rm=TRUE),
  dew.point = mean(dew.point,na.rm=TRUE),
  
  vis.dist = mean(vis.dist,na.rm=TRUE),
  lat      = mean(lat),
  lon      = mean(lon), 
  elev     = mean(elev,na.rm=TRUE)
), by=c("USAFID", "day")]
met_avg[, elev_cat := ifelse(elev > 252, "high", "low")]
met_avg[, region := ifelse(lon > -98 & lat>39.71, "NE", 
                              ifelse(lon > -98 & lat <39.71, "NW",
                                    ifelse(lon<-98 & met$lat>39.71, "SE", "SW" )))]

**used chat gpt in code above to figure out the ifelse statement format

##Question 3

ggplot(data = met_avg) + 
  geom_violin(mapping = aes(x = dew.point, y = wind.sp)) + 
  facet_wrap(~ region, nrow = 2)
Warning: Removed 397 rows containing non-finite values (`stat_ydensity()`).

It seems as though Northeastern regions have a wider range of dew point by wind speed than any other region. Additionally, SW regions have the smallest range of dew point by wind speed. All regions, however, have dew.point averages around the same wind speeds.

Question 4

library("cowplot")

Attaching package: 'cowplot'
The following object is masked from 'package:lubridate':

    stamp
met_avg[!is.na(dew.point & wind.sp)]
       USAFID day     temp       rh  wind.sp  dew.point vis.dist      lat
    1: 690150   1 32.59583 20.65546 2.895833  6.3000000 15958.92 34.29967
    2: 690150   2 33.54167 13.18436 4.154167  1.2166667 16093.00 34.30000
    3: 690150   3 34.30833 14.20129 4.433333  2.4958333 16025.96 34.30000
    4: 690150   4 34.72083 11.70307 4.245833  0.5541667 16093.00 34.30000
    5: 690150   5 34.77391 13.20827 3.660870  1.4956522 15953.09 34.30000
   ---                                                                   
48317: 726813  27 20.05833 44.96461 1.579167  5.9375000 15891.88 43.65000
48318: 726813  28 21.18333 46.79024 1.175000  6.8041667 15623.67 43.65000
48319: 726813  29 24.32083 49.07651 1.733333 10.8083333 16025.96 43.65000
48320: 726813  30 24.54583 50.43012 3.508333 12.3083333 16025.96 43.64967
48321: 726813  31 24.27917 51.08570 1.208333 11.6875000 16025.96 43.64933
             lon     elev elev_cat region
    1: -116.1657 690.0833     high     SW
    2: -116.1660 696.0000     high     SW
    3: -116.1660 696.0000     high     SW
    4: -116.1660 696.0000     high     SW
    5: -116.1660 696.0000     high     SW
   ---                                   
48317: -116.6330 741.0000     high     SE
48318: -116.6330 741.0000     high     SE
48319: -116.6330 741.0000     high     SE
48320: -116.6331 741.0000     high     SE
48321: -116.6332 741.0000     high     SE
nojitter <- ggplot(data = met[1:1000,]) + 
  geom_point(mapping = aes(x=dew.point, y=wind.sp)) + 
geom_smooth(mapping = aes(x = dew.point, y = wind.sp, linetype = region))
jitter <- ggplot(data = met[1:1000,]) + 
  geom_point(mapping = aes(x=dew.point, y=wind.sp, fill=region), position = "jitter")
plot_grid(nojitter, jitter, labels = "AUTO") 
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
Warning: Removed 12 rows containing non-finite values (`stat_smooth()`).
Warning: Removed 12 rows containing missing values (`geom_point()`).
Removed 12 rows containing missing values (`geom_point()`).

Although coloring is set for all regions, only southwestern regions are apparent on the graph. In the graph with jitter, there are more obvious patterns of associations. With the added line, you can easily visualize that as dew point increases, wind speed decreases.

Question 5

met_avg[!is.na(elev_cat)] %>%
  ggplot() + 
  geom_bar(mapping = aes(x = elev_cat, color= region, fill = USAFID), position = "dodge") + scale_fill_brewer(palette = "Set2") + 
    labs(title = "Accent")
Warning: The following aesthetics were dropped during statistical transformation: fill
ℹ This can happen when ggplot fails to infer the correct grouping structure in
  the data.
ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
  variable into a factor?

The graph demonstrates that hight and low elevation levels have different variations of sites by region. High elevations have more sites in the Northeast and Southwest areas, while low elevation levels have more sites in the Northeast and Northwest areas.

Question 6

l <- met_avg[!is.na(wind.sp & dew.point) ] %>%
  ggplot() + 
    stat_summary(mapping = aes(x = dew.point, y = wind.sp, color=region),
    fun.data="mean_sdl",             
    fun.min = min,
    fun.max = max,
    fun = median)
l
Warning: Removed 32121 rows containing missing values (`geom_segment()`).

The dew point for all regions seems to land around the same wind speed (between 0 and 10). Northwest regions have slighly larger temperatures than other regions, but this does not seem to be significantly associated.

Question 7

library(leaflet)
met_avg[!is.na(rh)] 
       USAFID day     temp       rh  wind.sp  dew.point vis.dist      lat
    1: 690150   1 32.59583 20.65546 2.895833  6.3000000 15958.92 34.29967
    2: 690150   2 33.54167 13.18436 4.154167  1.2166667 16093.00 34.30000
    3: 690150   3 34.30833 14.20129 4.433333  2.4958333 16025.96 34.30000
    4: 690150   4 34.72083 11.70307 4.245833  0.5541667 16093.00 34.30000
    5: 690150   5 34.77391 13.20827 3.660870  1.4956522 15953.09 34.30000
   ---                                                                   
48694: 726813  27 20.05833 44.96461 1.579167  5.9375000 15891.88 43.65000
48695: 726813  28 21.18333 46.79024 1.175000  6.8041667 15623.67 43.65000
48696: 726813  29 24.32083 49.07651 1.733333 10.8083333 16025.96 43.65000
48697: 726813  30 24.54583 50.43012 3.508333 12.3083333 16025.96 43.64967
48698: 726813  31 24.27917 51.08570 1.208333 11.6875000 16025.96 43.64933
             lon     elev elev_cat region
    1: -116.1657 690.0833     high     SW
    2: -116.1660 696.0000     high     SW
    3: -116.1660 696.0000     high     SW
    4: -116.1660 696.0000     high     SW
    5: -116.1660 696.0000     high     SW
   ---                                   
48694: -116.6330 741.0000     high     SE
48695: -116.6330 741.0000     high     SE
48696: -116.6330 741.0000     high     SE
48697: -116.6331 741.0000     high     SE
48698: -116.6332 741.0000     high     SE
met2 <- met_avg[,.(rh = mean(rh,na.rm=TRUE), lat = mean(lat), lon = mean(lon)),  by=c("USAFID")]
# Generating a color palette
rh.pal <- colorNumeric(c('darkgreen','goldenrod','brown'), domain=met2$dew.point)
rh.pal
function (x) 
{
    if (length(x) == 0 || all(is.na(x))) {
        return(pf(x))
    }
    if (is.null(rng)) 
        rng <- range(x, na.rm = TRUE)
    rescaled <- scales::rescale(x, from = rng)
    if (any(rescaled < 0 | rescaled > 1, na.rm = TRUE)) 
        warning("Some values were outside the color scale and will be treated as NA")
    if (reverse) {
        rescaled <- 1 - rescaled
    }
    pf(rescaled)
}
<bytecode: 0x7fd626eff900>
<environment: 0x7fd626f0bbd8>
attr(,"colorType")
[1] "numeric"
attr(,"colorArgs")
attr(,"colorArgs")$na.color
[1] "#808080"
rhmap <- leaflet(met2) %>%
  # The looks of the Map
  addProviderTiles('CartoDB.Positron') %>%
  # Some circles
  addCircles(
    lat = ~lat, lng=~lon,
                                                  # HERE IS OUR PAL!
    label = ~paste0(round(rh,2), ' C'), color = ~rh.pal(rh),
    opacity = 1, fillOpacity = 1, radius = 500
    ) %>%
  # And a pretty legend
  addLegend('bottomleft', pal=rh.pal, values=met2$rh,
          title='Relative Humidity, C', opacity=1) %>%
addMarkers(data=met2[rank(-rh) <= 10])
Assuming "lon" and "lat" are longitude and latitude, respectively

**used chatgpt to learn how to use the %>% symbol

rhmap

The relative humidity is highest on the western coast and eastern and mid-eastern states. As you approach the west, humidity drops until you reach the western coast.

Question 8

library("ggplot2")
library("gganimate")
base <- 
ggplot (data=met_avg) +
geom_point(mapping=aes(x=dew.point, y=wind.sp))
plot(base)
Warning: Removed 397 rows containing missing values (`geom_point()`).

newmap <- base + 
   transition_states(
    region,
    transition_length = 2,
    state_length = 1
  ) +
  enter_fade() + 
  exit_shrink() +
  ease_aes('sine-in-out')
newmap
Warning: Removed 89 rows containing missing values (`geom_point()`).
Removed 89 rows containing missing values (`geom_point()`).
Removed 89 rows containing missing values (`geom_point()`).
Removed 89 rows containing missing values (`geom_point()`).
Removed 89 rows containing missing values (`geom_point()`).
Removed 89 rows containing missing values (`geom_point()`).
Removed 89 rows containing missing values (`geom_point()`).
Removed 89 rows containing missing values (`geom_point()`).
Removed 89 rows containing missing values (`geom_point()`).
Removed 89 rows containing missing values (`geom_point()`).
Warning: Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Removed 313 rows containing missing values (`geom_point()`).
Warning: Removed 235 rows containing missing values (`geom_point()`).
Removed 235 rows containing missing values (`geom_point()`).
Removed 235 rows containing missing values (`geom_point()`).
Removed 235 rows containing missing values (`geom_point()`).
Removed 235 rows containing missing values (`geom_point()`).
Removed 235 rows containing missing values (`geom_point()`).
Removed 235 rows containing missing values (`geom_point()`).
Removed 235 rows containing missing values (`geom_point()`).
Removed 235 rows containing missing values (`geom_point()`).
Removed 235 rows containing missing values (`geom_point()`).
Warning: Removed 248 rows containing missing values (`geom_point()`).
Removed 248 rows containing missing values (`geom_point()`).
Removed 248 rows containing missing values (`geom_point()`).
Removed 248 rows containing missing values (`geom_point()`).
Removed 248 rows containing missing values (`geom_point()`).
Removed 248 rows containing missing values (`geom_point()`).
Removed 248 rows containing missing values (`geom_point()`).
Removed 248 rows containing missing values (`geom_point()`).
Removed 248 rows containing missing values (`geom_point()`).
Removed 248 rows containing missing values (`geom_point()`).
Removed 248 rows containing missing values (`geom_point()`).
Removed 248 rows containing missing values (`geom_point()`).
Removed 248 rows containing missing values (`geom_point()`).
Removed 248 rows containing missing values (`geom_point()`).
Removed 248 rows containing missing values (`geom_point()`).
Warning: Removed 13 rows containing missing values (`geom_point()`).
Removed 13 rows containing missing values (`geom_point()`).
Removed 13 rows containing missing values (`geom_point()`).
Removed 13 rows containing missing values (`geom_point()`).
Removed 13 rows containing missing values (`geom_point()`).
Removed 13 rows containing missing values (`geom_point()`).
Removed 13 rows containing missing values (`geom_point()`).
Removed 13 rows containing missing values (`geom_point()`).
Removed 13 rows containing missing values (`geom_point()`).
Warning: Removed 73 rows containing missing values (`geom_point()`).
Removed 73 rows containing missing values (`geom_point()`).
Removed 73 rows containing missing values (`geom_point()`).
Removed 73 rows containing missing values (`geom_point()`).
Removed 73 rows containing missing values (`geom_point()`).
Removed 73 rows containing missing values (`geom_point()`).
Removed 73 rows containing missing values (`geom_point()`).
Removed 73 rows containing missing values (`geom_point()`).
Removed 73 rows containing missing values (`geom_point()`).
Removed 73 rows containing missing values (`geom_point()`).
Removed 73 rows containing missing values (`geom_point()`).
Removed 73 rows containing missing values (`geom_point()`).
Removed 73 rows containing missing values (`geom_point()`).
Removed 73 rows containing missing values (`geom_point()`).
Removed 73 rows containing missing values (`geom_point()`).
Warning: Removed 60 rows containing missing values (`geom_point()`).
Removed 60 rows containing missing values (`geom_point()`).
Removed 60 rows containing missing values (`geom_point()`).
Removed 60 rows containing missing values (`geom_point()`).
Removed 60 rows containing missing values (`geom_point()`).
Removed 60 rows containing missing values (`geom_point()`).
Removed 60 rows containing missing values (`geom_point()`).
Removed 60 rows containing missing values (`geom_point()`).
Removed 60 rows containing missing values (`geom_point()`).
Removed 60 rows containing missing values (`geom_point()`).
Warning: Removed 149 rows containing missing values (`geom_point()`).
Removed 149 rows containing missing values (`geom_point()`).
Removed 149 rows containing missing values (`geom_point()`).
Removed 149 rows containing missing values (`geom_point()`).
Removed 149 rows containing missing values (`geom_point()`).
Removed 149 rows containing missing values (`geom_point()`).
Removed 149 rows containing missing values (`geom_point()`).
Removed 149 rows containing missing values (`geom_point()`).
Removed 149 rows containing missing values (`geom_point()`).
Removed 149 rows containing missing values (`geom_point()`).
Removed 149 rows containing missing values (`geom_point()`).
Removed 149 rows containing missing values (`geom_point()`).
Removed 149 rows containing missing values (`geom_point()`).
Removed 149 rows containing missing values (`geom_point()`).
Removed 149 rows containing missing values (`geom_point()`).
Warning: Removed 89 rows containing missing values (`geom_point()`).

plot(newmap)
Warning: Removed 248 rows containing missing values (`geom_point()`).